{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github"
      },
      "source": [
        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/text-to-video-synthesis-colab/blob/main/text_to_video_synthesis.ipynb)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "n2ibl97e2yM4"
      },
      "outputs": [],
      "source": [
        "!apt -y install -qq aria2\n",
        "!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U\n",
        "!pip install pandas-gbq==0.18.1 -U\n",
        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/text-to-video-synthesis/resolve/main/hub/damo/text-to-video-synthesis/VQGAN_autoencoder.pth -d /content/models -o VQGAN_autoencoder.pth\n",
        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/text-to-video-synthesis/resolve/main/hub/damo/text-to-video-synthesis/open_clip_pytorch_model.bin -d /content/models -o open_clip_pytorch_model.bin\n",
        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/text-to-video-synthesis/resolve/main/hub/damo/text-to-video-synthesis/text2video_pytorch_model.pth -d /content/models -o text2video_pytorch_model.pth\n",
        "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/text-to-video-synthesis/raw/main/hub/damo/text-to-video-synthesis/configuration.json -d /content/models -o configuration.json\n",
        "\n",
        "# from https://huggingface.co/kabachuha/modelscope-damo-text2video-pruned-weights\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/kabachuha/modelscope-damo-text2video-pruned-weights/resolve/main/VQGAN_autoencoder.pth -d /content/models -o VQGAN_autoencoder.pth\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/kabachuha/modelscope-damo-text2video-pruned-weights/resolve/main/open_clip_pytorch_model.bin -d /content/models -o open_clip_pytorch_model.bin\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/kabachuha/modelscope-damo-text2video-pruned-weights/resolve/main/text2video_pytorch_model.pth -d /content/models -o text2video_pytorch_model.pth\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/kabachuha/modelscope-damo-text2video-pruned-weights/raw/main/configuration.json -d /content/models -o configuration.json\n",
        "\n",
        "!pip install -q open_clip_torch pytorch_lightning \n",
        "!pip install -q git+https://github.com/camenduru/modelscope\n",
        "!sed -i -e 's/\\\"tiny_gpu\\\": 1/\\\"tiny_gpu\\\": 0/g' /content/models/configuration.json\n",
        "\n",
        "import os\n",
        "os._exit(0)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "cqo7g4p2_V78"
      },
      "outputs": [],
      "source": [
        "import torch, random, gc\n",
        "from modelscope.pipelines import pipeline\n",
        "from modelscope.outputs import OutputKeys\n",
        "\n",
        "torch.manual_seed(random.randint(0, 2147483647))\n",
        "pipe = pipeline('text-to-video-synthesis', '/content/models')\n",
        "\n",
        "!mkdir /content/videos"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import gc\n",
        "import datetime\n",
        "from IPython.display import HTML\n",
        "\n",
        "with torch.no_grad():\n",
        "  torch.cuda.empty_cache()\n",
        "gc.collect()\n",
        "\n",
        "test_text = {\n",
        "        'text': 'A panda eating bamboo on a rock.',\n",
        "    }\n",
        "output_video_path = pipe(test_text,)[OutputKeys.OUTPUT_VIDEO]\n",
        "\n",
        "new_video_path = f'/content/videos/{datetime.datetime.now().strftime(\"%Y-%m-%d_%H:%M:%S\")}.mp4'\n",
        "!ffmpeg -y -i {output_video_path} -c:v libx264 -c:a aac -strict -2 {new_video_path} >/dev/null 2>&1\n",
        "\n",
        "print(output_video_path, '->', new_video_path)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from IPython.display import HTML\n",
        "from base64 import b64encode\n",
        "\n",
        "!cp {new_video_path} /content/videos/tmp.mp4\n",
        "mp4 = open('/content/videos/tmp.mp4','rb').read()\n",
        "\n",
        "decoded_vid = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
        "HTML(f'<video width=400 controls><source src=\"{decoded_vid}\" type=\"video/mp4\"></video>')"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "provenance": []
    },
    "gpuClass": "standard",
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
